Baf53cre ENS neurons, SI data
CR infection 7d, CTL x4 CKO(Ahr-cko) x4
loading 60k cells,
cellranger called 35,059 cells
filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`
dim(GEX)
## [1] 32285 35059
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAAGTCTA-1 AAACCCAAGACATACA-1 AAACCCAAGACTCATC-1
## Xkr4 . 1 .
## Gm1992 . 1 2
## Gm19938 1 . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
## AAACCCAAGAGGATGA-1 AAACCCAAGAGGTTTA-1 AAACCCAAGATACCAA-1
## Xkr4 4 2 .
## Gm1992 1 1 .
## Gm19938 1 . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
dim(FB)
## [1] 8 35059
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAAGTCTA-1 AAACCCAAGACATACA-1 AAACCCAAGACTCATC-1
## CTL.1 56 93 82
## CTL.2 12 24 15
## CTL.3 60 160 21
## CTL.4 32 39 93
## CKO.1 36 92 51
## CKO.2 . 1 1
## CKO.3 68 80 71
## CKO.4 127 195 191
## AAACCCAAGAGGATGA-1 AAACCCAAGAGGTTTA-1 AAACCCAAGATACCAA-1
## CTL.1 119 969 346
## CTL.2 144 31 18
## CTL.3 21 26 18
## CTL.4 40 32 38
## CKO.1 79 83 57
## CKO.2 2 1 2
## CKO.3 243 124 220
## CKO.4 238 284 198
rowSums(FB)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 6725956 1213800 1019608 1958441 3855728 44350 4876856 10727664
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 35054 34991 35023 35047 35046 17297 35055 35056
# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "CR7d_SI")
FB.seur
## An object of class Seurat
## 8 features across 35059 samples within 1 assay
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CTL.1" "CTL.2" "CTL.3" "CTL.4" "CKO.1" "CKO.2" "CKO.3" "CKO.4"
perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html
# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features
first define FB colors based on conditions
color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
2,7,12,17
)]
level.FB <- c(rownames(FB.seur))
color.FBraw <- c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 6)
scales::show_col(color.FB, ncol = 4)
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
table.demux
## quantile Doublet CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 Negative
## 1 0.50 32609 329 293 279 346 328 45 262 274 294
## 2 0.51 32500 344 317 298 347 334 48 272 287 312
## 3 0.52 32317 372 329 334 368 342 51 298 318 330
## 4 0.53 32248 381 338 346 367 358 52 304 318 347
## 5 0.54 32038 411 365 342 403 392 54 333 351 370
## 6 0.55 31958 416 381 357 416 398 54 339 357 383
## 7 0.56 31812 433 379 378 429 430 55 354 380 409
## 8 0.57 31751 437 389 395 440 431 55 360 382 419
## 9 0.58 31611 447 413 423 456 447 55 371 393 443
## 10 0.59 31441 478 432 418 483 477 59 391 417 463
## 11 0.60 31142 530 457 457 521 515 61 431 452 493
## 12 0.61 31056 539 471 470 533 520 62 446 461 501
## 13 0.62 30980 542 481 486 532 535 63 455 472 513
## 14 0.63 30761 573 516 494 581 567 66 477 491 533
## 15 0.64 30492 609 535 530 617 609 68 507 518 574
## 16 0.65 30409 616 551 545 635 607 71 515 525 585
## 17 0.66 30257 635 571 567 642 626 76 521 542 622
## 18 0.67 30233 633 575 568 644 631 76 522 545 632
## 19 0.68 29815 693 620 610 700 699 82 574 590 676
## 20 0.69 29716 697 633 630 716 707 82 586 596 696
## 21 0.70 29579 710 655 658 728 725 84 595 603 722
## 22 0.71 29202 771 682 691 781 784 88 629 663 768
## 23 0.72 29019 793 717 718 793 808 88 643 683 797
## 24 0.73 28888 809 744 738 796 816 91 659 696 822
## 25 0.74 28701 839 749 772 826 837 93 665 718 859
## 26 0.75 28368 891 814 796 856 888 96 689 759 902
## 27 0.76 28241 899 832 819 859 897 97 703 766 946
## 28 0.77 28019 919 843 856 894 920 101 735 790 982
## 29 0.78 27744 960 881 873 922 956 105 767 816 1035
## 30 0.79 27484 984 900 921 955 989 109 788 842 1087
## 31 0.80 27355 996 915 942 960 997 113 800 851 1130
## 32 0.81 26982 1049 946 960 1015 1048 124 836 886 1213
## 33 0.82 26805 1074 974 986 1031 1061 127 833 898 1270
## 34 0.83 23185 1623 1483 1416 1582 1572 82 1204 1259 1653
## 35 0.84 22994 1638 1517 1440 1599 1593 87 1203 1268 1720
## 36 0.85 22369 1725 1605 1488 1684 1682 92 1253 1330 1831
## 37 0.86 22093 1750 1637 1533 1705 1705 96 1262 1354 1924
## 38 0.87 21604 1808 1681 1568 1771 1783 105 1293 1389 2057
## 39 0.88 21161 1871 1711 1647 1813 1832 112 1307 1405 2200
## 40 0.89 20713 1923 1777 1669 1863 1882 117 1328 1437 2350
## 41 0.90 20330 1966 1809 1729 1912 1919 121 1351 1432 2490
## 42 0.91 19686 2043 1881 1786 1976 1995 138 1393 1462 2699
## 43 0.92 19114 2107 1937 1825 2013 2073 155 1407 1489 2939
## 44 0.93 18485 2193 2005 1870 2062 2115 158 1414 1528 3229
## 45 0.94 17819 2281 2079 1944 2109 2171 167 1397 1556 3536
## 46 0.95 17107 2344 2126 2001 2178 2238 178 1405 1564 3918
## 47 0.96 16356 2406 2183 2056 2214 2290 195 1416 1535 4408
## 48 0.97 15384 2488 2267 2096 2272 2394 205 1388 1510 5055
## 49 0.98 14144 2583 2358 2110 2360 2494 224 1370 1486 5930
## 50 0.99 10058 3024 2764 2451 2695 2972 167 1371 1536 8021
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")
#plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")
#plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.91)
## Cutoff for CTL.1 : 140 reads
## Cutoff for CTL.2 : 27 reads
## Cutoff for CTL.3 : 25 reads
## Cutoff for CTL.4 : 53 reads
## Cutoff for CKO.1 : 79 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 153 reads
## Cutoff for CKO.4 : 354 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 19686 2699 12674
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 19686 2699 2043 1881 1786 1976 1995 138
## CKO.3 CKO.4
## 1393 1462
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.92)
## Cutoff for CTL.1 : 145 reads
## Cutoff for CTL.2 : 28 reads
## Cutoff for CTL.3 : 26 reads
## Cutoff for CTL.4 : 55 reads
## Cutoff for CKO.1 : 81 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 159 reads
## Cutoff for CKO.4 : 366 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 19114 2939 13006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 19114 2939 2107 1937 1825 2013 2073 155
## CKO.3 CKO.4
## 1407 1489
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.93)
## Cutoff for CTL.1 : 150 reads
## Cutoff for CTL.2 : 29 reads
## Cutoff for CTL.3 : 27 reads
## Cutoff for CTL.4 : 57 reads
## Cutoff for CKO.1 : 84 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 166 reads
## Cutoff for CKO.4 : 380 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 18485 3229 13345
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 18485 3229 2193 2005 1870 2062 2115 158
## CKO.3 CKO.4
## 1414 1528
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.94)
## Cutoff for CTL.1 : 156 reads
## Cutoff for CTL.2 : 30 reads
## Cutoff for CTL.3 : 28 reads
## Cutoff for CTL.4 : 59 reads
## Cutoff for CKO.1 : 88 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 173 reads
## Cutoff for CKO.4 : 396 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 17819 3536 13704
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 17819 3536 2281 2079 1944 2109 2171 167
## CKO.3 CKO.4
## 1397 1556
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.95)
## Cutoff for CTL.1 : 163 reads
## Cutoff for CTL.2 : 32 reads
## Cutoff for CTL.3 : 29 reads
## Cutoff for CTL.4 : 61 reads
## Cutoff for CKO.1 : 92 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 182 reads
## Cutoff for CKO.4 : 415 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 17107 3918 14034
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 17107 3918 2344 2126 2001 2178 2238 178
## CKO.3 CKO.4
## 1405 1564
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.96)
## Cutoff for CTL.1 : 172 reads
## Cutoff for CTL.2 : 34 reads
## Cutoff for CTL.3 : 30 reads
## Cutoff for CTL.4 : 65 reads
## Cutoff for CKO.1 : 97 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 192 reads
## Cutoff for CKO.4 : 438 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 16356 4408 14295
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 16356 4408 2406 2183 2056 2214 2290 195
## CKO.3 CKO.4
## 1416 1535
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for CTL.1 : 183 reads
## Cutoff for CTL.2 : 36 reads
## Cutoff for CTL.3 : 32 reads
## Cutoff for CTL.4 : 69 reads
## Cutoff for CKO.1 : 103 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 205 reads
## Cutoff for CKO.4 : 467 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 15384 5055 14620
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 15384 5055 2488 2267 2096 2272 2394 205
## CKO.3 CKO.4
## 1388 1510
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for CTL.1 : 198 reads
## Cutoff for CTL.2 : 39 reads
## Cutoff for CTL.3 : 35 reads
## Cutoff for CTL.4 : 74 reads
## Cutoff for CKO.1 : 111 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 224 reads
## Cutoff for CKO.4 : 507 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 14144 5930 14985
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 14144 5930 2583 2358 2110 2360 2494 224
## CKO.3 CKO.4
## 1370 1486
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 224 reads
## Cutoff for CTL.2 : 45 reads
## Cutoff for CTL.3 : 39 reads
## Cutoff for CTL.4 : 83 reads
## Cutoff for CKO.1 : 125 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 256 reads
## Cutoff for CKO.4 : 575 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 10058 8021 16980
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 10058 8021 3024 2764 2451 2695 2972 167
## CKO.3 CKO.4
## 1371 1536
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CTL.1 : 232 reads
## Cutoff for CTL.2 : 46 reads
## Cutoff for CTL.3 : 40 reads
## Cutoff for CTL.4 : 86 reads
## Cutoff for CKO.1 : 130 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 266 reads
## Cutoff for CKO.4 : 597 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9498 8535 17026
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 9498 8535 3020 2809 2474 2695 2987 173
## CKO.3 CKO.4
## 1355 1513
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CTL.1 : 242 reads
## Cutoff for CTL.2 : 49 reads
## Cutoff for CTL.3 : 42 reads
## Cutoff for CTL.4 : 90 reads
## Cutoff for CKO.1 : 136 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 279 reads
## Cutoff for CKO.4 : 625 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 8760 9292 17007
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8760 9292 3061 2809 2452 2698 3013 185
## CKO.3 CKO.4
## 1331 1458
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CTL.1 : 256 reads
## Cutoff for CTL.2 : 52 reads
## Cutoff for CTL.3 : 44 reads
## Cutoff for CTL.4 : 95 reads
## Cutoff for CKO.1 : 144 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 297 reads
## Cutoff for CKO.4 : 663 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 7948 10280 16831
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 7948 10280 3047 2830 2458 2638 3010 189
## CKO.3 CKO.4
## 1294 1365
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CTL.1 : 281 reads
## Cutoff for CTL.2 : 57 reads
## Cutoff for CTL.3 : 48 reads
## Cutoff for CTL.4 : 104 reads
## Cutoff for CKO.1 : 157 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 327 reads
## Cutoff for CKO.4 : 729 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 6757 11921 16381
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 6757 11921 2957 2848 2382 2509 2999 199
## CKO.3 CKO.4
## 1226 1261
# seems have to select a much suitable cutoff for CKO.2 based on its counts distribution
# others could check q0.95~q0.99 to choose a better one
#
# CTL.1 q0.994 242
# CTL.2 q0.99 45
# CTL.3 q0.99 39
# CTL.4 q0.98 74
# CKO.1 q0.994 136
# CKO.2 UMI 330
# CKO.3 q0.98 224
# CKO.4 q0.95 415
#
cutoff.FB <- c(248,45,39,72,
156,330,224,430)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 248 45 39 72 156 330 224 430
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
must process CKO.2 specifically, or the overall result could be messed up
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
# define decision matrix
dd <- FBobj@assays[['HTO']]@counts
dd <- apply(dd, 2, function(x){
x > FBcutoff
})
x1 <- colSums(dd)
x1[x1>1] <- "Doublet"
x1[x1==0] <- "Negative"
x1[x1==1] <- "Singlet"
x2 <- x1
bc.slt <- names(x1)[x1=="Singlet"]
for(bc in bc.slt){
x2[bc] <- rownames(dd)[dd[,bc]]
}
FBdf <- data.frame(HTO_classification.global=factor(x1,
levels = c("Doublet","Negative","Singlet")),
hash.ID=factor(x2,
levels = c("Doublet","Negative",rownames(dd))))
return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 35059 2
df.FB[1:10,]
## HTO_classification.global hash.ID
## AAACCCAAGAAGTCTA-1 Singlet CTL.3
## AAACCCAAGACATACA-1 Singlet CTL.3
## AAACCCAAGACTCATC-1 Singlet CTL.4
## AAACCCAAGAGGATGA-1 Doublet Doublet
## AAACCCAAGAGGTTTA-1 Singlet CTL.1
## AAACCCAAGATACCAA-1 Singlet CTL.1
## AAACCCAAGATGATTG-1 Doublet Doublet
## AAACCCAAGGCTCTAT-1 Singlet CKO.4
## AAACCCAAGGTCTTTG-1 Singlet CKO.3
## AAACCCAAGGTTGGAC-1 Singlet CTL.4
## custom cutoff run this
table(df.FB)
## hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## Doublet 8617 0 0 0 0 0 0 0
## Negative 0 7359 0 0 0 0 0 0
## Singlet 0 0 3032 2930 2709 3294 2728 9
## hash.ID
## HTO_classification.global CKO.3 CKO.4
## Doublet 0 0
## Negative 0 0
## Singlet 1851 2530
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
## orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAAGTCTA-1 CR7d_SI 391 7 391 7
## AAACCCAAGACATACA-1 CR7d_SI 684 8 684 8
## AAACCCAAGACTCATC-1 CR7d_SI 525 8 525 8
## AAACCCAAGAGGATGA-1 CR7d_SI 886 8 886 8
## HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAAGTCTA-1 CTL.3 CTL.4 0.7347680 CTL.3
## AAACCCAAGACATACA-1 CTL.3 CKO.1 1.3023967 CTL.3
## AAACCCAAGACTCATC-1 CTL.4 CTL.3 0.5069012 Negative
## AAACCCAAGAGGATGA-1 CTL.2 CKO.3 0.7489803 CTL.2
## HTO_classification.global hash.ID
## AAACCCAAGAAGTCTA-1 Singlet CTL.3
## AAACCCAAGACATACA-1 Singlet CTL.3
## AAACCCAAGACTCATC-1 Singlet CTL.4
## AAACCCAAGAGGATGA-1 Doublet Doublet
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
# levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
# levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,23500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1280,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
cols = color.FB)
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort")
# first, remove negative cells from th object
#FB.seur.subset <- FB.seur
# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"
#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
##FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
##FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = setdiff(rownames(FB.seur.subset),"CKO.2"), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:7, perplexity = 300, check_duplicates = FALSE)
#saveRDS(FB.seur.subset, "FB1222.seur.subset.rds")
FB.seur.subset <- readRDS("FB1222.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
cols = color.FB) + labs(title = "FB Max_ID"),
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID', label = F,
cols = c("lightgrey",color.FB,"#FF6C91")) +
labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)
table(FB.seur@meta.data$hash.ID.sort)
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8617 7359 3032 2930 2709 3294 2728 9
## CKO.3 CKO.4
## 1851 2530
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
GEX.seur <- CreateSeuratObject(counts = GEX,
min.cells = 3,
min.features = 200,
project = "CR7d_SI")
GEX.seur
## An object of class Seurat
## 24081 features across 35059 samples within 1 assay
## Active assay: RNA (24081 features, 0 variable features)
grep("^mt-",rownames(GEX),value = T)
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)
RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)
# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8617 7359 3032 2930 2709 3294 2728 9
## CKO.3 CKO.4
## 1851 2530
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative" & FB.info!="CKO.2")
GEX.seur
## An object of class Seurat
## 24081 features across 19074 samples within 1 assay
## Active assay: RNA (24081 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA > 500 & nFeature_RNA < 4000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat
## 24081 features across 18878 samples within 1 assay
## Active assay: RNA (24081 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,10800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+675,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,6800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+475,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))
GEX.seur <- CellCycleScoring(GEX.seur,
s.features = s.genes,
g2m.features = g2m.genes)
## Warning: The following features are not present in the object: E2f8, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Cdc25c, not
## searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info",
ncol = 2, pt.size = 0.1)
GEX.seur.cc <- GEX.seur
GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)
GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')
GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)
# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2
head(VariableFeatures(GEX.seur), 200)
## [1] "Wdr17" "Gal" "Mgat4c" "Zfp804a"
## [5] "Cntn5" "Adgrg6" "Gm30613" "Klhl1"
## [9] "Kcnb2" "Sst" "Robo2" "Gm32647"
## [13] "Cdh8" "Cpne4" "Gm29521" "Brinp3"
## [17] "Ntng1" "Cntnap2" "Gm21847" "Nrxn3"
## [21] "Ano2" "Cdh19" "Pdzrn4" "Cdh9"
## [25] "Gpr149" "Ntrk3" "Lingo2" "Nmu"
## [29] "Ebf1" "Nrg1" "Sgcz" "Kcnip4"
## [33] "Gm15261" "Tmeff2" "Zfp804b" "Dgkg"
## [37] "Astn2" "Pcdh9" "Cmah" "Cdh18"
## [41] "Grm5" "Clstn2" "Myh11" "Gm29771"
## [45] "Car10" "March1" "Cemip" "Prkg1"
## [49] "Rbpms" "Nxph2" "Cdh6" "Sema5a"
## [53] "Schip1" "Vip" "Gpc5" "Piezo2"
## [57] "Chsy3" "Ano1" "Gpm6a" "Vwc2l"
## [61] "Grin3a" "Rasgef1b" "Fgf14" "Dcc"
## [65] "Penk" "Meis2" "Asic2" "Pbx3"
## [69] "4930509J09Rik" "Efr3a" "Pcdh10" "Plxna4"
## [73] "Col4a6" "Kcnq5" "Fmo2" "4930432L08Rik"
## [77] "Unc5d" "Egfem1" "Itgb6" "Ccbe1"
## [81] "Cysltr2" "Dkk2" "Zfhx3" "Gm15680"
## [85] "Cpa6" "Gm20754" "Spock3" "9530059O14Rik"
## [89] "Myl1" "Abca9" "8030451A03Rik" "4931415C17Rik"
## [93] "Pde1a" "Gm26632" "Col25a1" "Csmd3"
## [97] "Acp7" "Kcnh7" "Tnr" "Robo1"
## [101] "Muc16" "Trhde" "Serpini1" "Skap1"
## [105] "AC150683.1" "Dpp10" "Dgkb" "Gm15584"
## [109] "Cacna2d3" "Bmpr1b" "4930422I22Rik" "A330008L17Rik"
## [113] "Luzp2" "Fut9" "Prkcq" "Sulf1"
## [117] "Nxph1" "Plcl1" "Gm49938" "Gm20713"
## [121] "Cfh" "Sox2ot" "Foxp2" "Gna14"
## [125] "Gm38505" "Tac1" "Trpm5" "Gm26612"
## [129] "Gm11342" "5530401A14Rik" "Csmd1" "Cntn4"
## [133] "9530026P05Rik" "Tafa2" "Chrm3" "Trps1"
## [137] "1700034P13Rik" "Pde4d" "Pcdh11x" "Bmp5"
## [141] "Kctd16" "Arhgap6" "Cux2" "Il1rapl1"
## [145] "Galnt18" "Kif26b" "Rab27b" "Gm49953"
## [149] "Gm16685" "Gm13561" "Kctd8" "D030068K23Rik"
## [153] "Kcnd2" "Hpse2" "mt-Co3" "Myh3"
## [157] "Adgrd1" "Ghr" "Prune2" "Arhgap15"
## [161] "B230312C02Rik" "Calcb" "Gm49127" "1700063D05Rik"
## [165] "1700111E14Rik" "mt-Atp6" "Slc4a4" "Ano5"
## [169] "Mme" "Lama2" "Hs6st3" "Grik1"
## [173] "Dgki" "Chrna9" "Gm20560" "Arhgap18"
## [177] "Gm11339" "Rasgrf1" "Rerg" "Mecom"
## [181] "B230323A14Rik" "Galr1" "Nell1" "Iqgap2"
## [185] "Gm28494" "Galntl6" "Gm30382" "Stk31"
## [189] "Ror1" "Nos1" "C7" "4930587E11Rik"
## [193] "Aebp1" "Ttc29" "Gm16226" "Flrt2"
## [197] "Gm12239" "mt-Co1" "Olfr1369-ps1" "Fgl2"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes and more
#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,
DIG,
CC_gene) )
GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1
## Positive: Lrrtm4, Galntl6, Tox, Ndst4, Grik1, Bnc2, Pcdh7, Tshz2, Cdc14a, Specc1
## Kcnd2, Chrna7, Epha5, Galnt17, Plcxd3, Syt6, Lrp1b, Fbxw15, Zfp536, Brinp2
## St6galnac3, Fbxw24, Kcnab1, Synpo2, Ptprt, Dock2, Oprk1, Nos1, Rspo2, Slc35f4
## Negative: Ntrk3, Ano2, Tmeff2, Robo2, Cdh8, Cpne4, Nrxn3, Mgat4c, Cntn5, Clstn2
## Adgrg6, Zfp804a, Myl1, Spock3, Gpr149, Pdzrn4, Cdh6, Cysltr2, Pcdh10, Grin3a
## Sgcz, Dgkg, Astn2, Cacna2d3, Kif26b, Plxna4, Itgb6, Cux2, Iqgap2, Zfhx3
## PC_ 2
## Positive: Rbfox1, Ptprt, Bnc2, Tafa1, Tshz2, Gpc6, Frmd4b, Grik1, St6galnac3, Fbxw15
## Tcf7l2, Brinp2, Cdc14a, Plcxd3, Tox, Fbxw24, Tmem132c, Dock2, Pcdh7, Caln1
## Pld5, Oprk1, Specc1, Unc5c, Adamtsl1, Slc35f4, Ccbe1, Nfia, Sdk2, Dsc3
## Negative: Nos1, Egfem1, Auts2, Kcnq5, Cadps2, Etv1, Epha5, Dach1, Gfra1, Asic2
## Kcnab1, Schip1, Alcam, Cmah, Dgkb, Kcnt2, Stxbp6, Rgs6, Hs6st3, Tmem108
## Adgrl3, Lrrc4c, Creb5, Cdh11, Fat3, Ebf1, Il1rapl1, Kcnj3, Tenm3, Nxn
## PC_ 3
## Positive: Cdh18, Kcnip4, Csmd3, Kctd8, Klhl1, Gpc6, Cadm2, Htr4, Pbx3, Cntn3
## Meis1, Dlc1, Gabrg3, Skap1, Csmd1, Car10, Tenm2, Pde10a, C79798, Serpini1
## March1, Dmd, Vwc2l, Unc5c, Sema5a, Khdrbs2, Pde4d, Sphkap, Plcl1, Sv2c
## Negative: Sgcd, Adgrg6, Cysltr2, Nos1, Gpr149, Nmu, Slc4a4, Grin3a, Itgb6, Dapk2
## Ano2, Dgkg, Rgs6, Nfia, Ccbe1, Cbln2, Ngfr, Kcnab1, Lhfpl2, Cpne4
## Otof, Gfra1, Syt2, Zfp536, Zfp804a, Efr3a, Pex7, Calcb, Smad6, Htr3b
## PC_ 4
## Positive: Klhl1, March1, Vwc2l, Sema5a, Pbx3, Cdh9, Zfhx3, Rasgef1b, C79798, Galnt18
## Kcnh7, Zbbx, Il1rapl1, Prune2, Olfr78, Lncbate1, Bcl2, Mgat4c, Scgn, Alk
## Tmeff2, Kif26b, Dpp10, Ntm, Pakap, Tenm4, Auts2, Serpini1, Vcan, Aff3
## Negative: Dock10, Lingo2, Kcnip4, Ndst4, Nxph1, Sorcs1, Csmd3, Tac1, Gda, Lrp1b
## Cntn5, Lrrc7, Lrrtm4, Thsd4, Cntn3, Epha5, Nyap2, Dmd, Sgcz, Pld5
## Robo1, Lsamp, Kctd8, Abca5, Mctp1, Ccdc60, Lama2, Unc5d, Prkg1, Ryr3
## PC_ 5
## Positive: Kcnt2, Dgkb, Prkg1, Thsd7b, Lrrc4c, Alcam, Cdh20, Khdrbs2, Rgs6, Epha5
## Car10, Klhl1, Gucy1a2, Plcl1, Vwc2l, Ptprz1, Stard13, Rasgef1b, Man1a, Vcan
## Galntl6, Dlgap1, Mgat4c, Il1rapl1, Hmcn1, Dpp10, Slc44a5, P3h2, Kctd8, Slc2a13
## Negative: Trhde, Ebf1, Cntn4, Trps1, Nrg1, Cpa6, Chsy3, Zmat4, Csmd1, Ptprd
## Col18a1, Gal, Kcnd2, Nkain3, Ntng1, Rmst, Npas3, Egfem1, Myo1e, Shisa6
## Sctr, Sdk1, Plcxd3, Myo16, Ptprt, Luzp2, Fstl4, Moxd1, Snca, Nav2
length(setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene)))
## [1] 1084
setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene))[1:300]
## [1] "Wdr17" "Gal" "Mgat4c" "Zfp804a" "Cntn5" "Adgrg6"
## [7] "Klhl1" "Kcnb2" "Sst" "Robo2" "Cdh8" "Cpne4"
## [13] "Brinp3" "Ntng1" "Cntnap2" "Nrxn3" "Ano2" "Cdh19"
## [19] "Pdzrn4" "Cdh9" "Gpr149" "Ntrk3" "Lingo2" "Nmu"
## [25] "Ebf1" "Nrg1" "Sgcz" "Kcnip4" "Tmeff2" "Zfp804b"
## [31] "Dgkg" "Astn2" "Pcdh9" "Cmah" "Cdh18" "Grm5"
## [37] "Clstn2" "Myh11" "Car10" "March1" "Cemip" "Prkg1"
## [43] "Rbpms" "Nxph2" "Cdh6" "Sema5a" "Schip1" "Vip"
## [49] "Gpc5" "Piezo2" "Chsy3" "Ano1" "Gpm6a" "Vwc2l"
## [55] "Grin3a" "Rasgef1b" "Fgf14" "Dcc" "Penk" "Meis2"
## [61] "Asic2" "Pbx3" "Efr3a" "Pcdh10" "Plxna4" "Col4a6"
## [67] "Kcnq5" "Fmo2" "Unc5d" "Egfem1" "Itgb6" "Ccbe1"
## [73] "Cysltr2" "Dkk2" "Zfhx3" "Cpa6" "Spock3" "Myl1"
## [79] "Abca9" "Pde1a" "Col25a1" "Csmd3" "Acp7" "Kcnh7"
## [85] "Tnr" "Robo1" "Muc16" "Trhde" "Serpini1" "Skap1"
## [91] "Dpp10" "Dgkb" "Cacna2d3" "Bmpr1b" "Luzp2" "Fut9"
## [97] "Prkcq" "Sulf1" "Nxph1" "Plcl1" "Cfh" "Sox2ot"
## [103] "Foxp2" "Gna14" "Tac1" "Trpm5" "Csmd1" "Cntn4"
## [109] "Tafa2" "Chrm3" "Trps1" "Pde4d" "Pcdh11x" "Bmp5"
## [115] "Kctd16" "Arhgap6" "Cux2" "Il1rapl1" "Galnt18" "Kif26b"
## [121] "Rab27b" "Kctd8" "Kcnd2" "Hpse2" "Myh3" "Adgrd1"
## [127] "Ghr" "Prune2" "Arhgap15" "Calcb" "Slc4a4" "Ano5"
## [133] "Mme" "Lama2" "Hs6st3" "Grik1" "Dgki" "Chrna9"
## [139] "Arhgap18" "Rasgrf1" "Rerg" "Mecom" "Galr1" "Nell1"
## [145] "Iqgap2" "Galntl6" "Stk31" "Ror1" "Nos1" "C7"
## [151] "Aebp1" "Ttc29" "Flrt2" "Fgl2" "Svep1" "Pdgfra"
## [157] "Col8a1" "Synpr" "Kcnmb2" "C79798" "Esrrb" "Dcn"
## [163] "Fbxl7" "Wt1" "Nox3" "Cntn3" "Adamts9" "Met"
## [169] "Cdh10" "Upk1b" "Cadm2" "Cbln2" "Thsd7b" "Adam2"
## [175] "Cav1" "Chrdl1" "Alcam" "Il18r1" "Pde4c" "Thsd4"
## [181] "C3" "Sctr" "Myocd" "Ntm" "Hs3st4" "Rxfp1"
## [187] "Grm7" "Sorcs3" "Dock10" "Dapk2" "Glyat" "Msln"
## [193] "Mast4" "Slc44a4" "Cadps2" "Galnt13" "Ngfr" "Ephb1"
## [199] "Olfr78" "Opcml" "Plxdc2" "Slc6a16" "Mid1" "Tbx20"
## [205] "Npas3" "Zfp286os" "Gabrg3" "Tenm2" "Tenm4" "Igfbp6"
## [211] "Abca8a" "Kit" "Mrvi1" "Rmst" "Etl4" "Ppp3ca"
## [217] "Tnc" "Ldlrad2" "Kirrel3" "Atg4a" "Prr16" "Mir99ahg"
## [223] "Gsta2" "Rarb" "Aff2" "Tex15" "Dab1" "Casp8"
## [229] "Zbbx" "Dach1" "Dpp4" "Calcrl" "Etv1" "F13a1"
## [235] "Tenm1" "Hccs" "Serpine2" "Adamts6" "Otof" "Rtl4"
## [241] "Sorcs1" "Slc39a8" "Wt1os" "Cpne8" "Lmx1a" "Hcn1"
## [247] "Adamtsl3" "Pkhd1l1" "Stxbp6" "Nell1os" "Itgbl1" "Sntg2"
## [253] "Aldh1a2" "Fstl4" "Grp" "Moxd1" "Bfsp2" "Adgrl4"
## [259] "Scnn1a" "Ptk7" "Tafa1" "Aspa" "Vcan" "Loxl1"
## [265] "Inhbc" "Pde7b" "Rarres2" "Lhfpl2" "Nkain3" "Wee2"
## [271] "Xlr4a" "Btk" "Necab1" "Slc44a5" "Wbp2nl" "Scgn"
## [277] "Ptprt" "Lrp2" "L3mbtl4" "Lrrc4c" "Nwd1" "Scn7a"
## [283] "Pantr1" "Lmcd1" "Nek1" "Upk3b" "Asb17" "Bnc1"
## [289] "Gda" "Auts2" "Gabrb1" "Htr4" "Fcrl5" "Col18a1"
## [295] "Ntrk2" "Serpinb5" "Slco1a1" "Prkca" "Prkag2" "Pakap"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)
DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 50)
PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 18878
## Number of edges: 755688
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8372
## Number of communities: 33
## Elapsed time: 3 seconds
GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 23:42:35 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:42:35 Read 18878 rows and found 25 numeric columns
## 23:42:35 Using Annoy for neighbor search, n_neighbors = 20
## 23:42:35 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:42:38 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpAbnhPC\file824713335f0
## 23:42:38 Searching Annoy index using 1 thread, search_k = 2000
## 23:42:42 Annoy recall = 100%
## 23:42:43 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 23:42:44 Initializing from normalized Laplacian + noise (using irlba)
## 23:42:45 Commencing optimization for 200 epochs, with 568600 positive edges
## 23:43:04 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
ncol = 4, cols = color.FB)
#DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
# ncol = 3, cols = color.FB)
GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
levels = c("CTL",
"CKO"))
color.cnt <- color.FB[c(1,5)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
markers.new.s <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
"Gfra2","Oprk1","Adamtsl1",
"Fbxw15","Fbxw24","Chrna7",
"Satb1","Cntnap5b",
"Gabrb1","Nxph1","Lama2","Lrrc7",
"Ryr3","Eda","Tac1",
"Kctd8","Ntrk2","Penk",
"Fut9","Nfatc1","Egfr","Mgll",
"Chrm3"
),
IMN=c("Nos1","Kcnab1",
"Gfra1","Etv1",
"Man1a","Airn",
"Adcy2","Cmah","Creb5","Vip","Pde1a",
"Ebf1","Gpc5"
),
IN=c("Npas3","Synpr","St18","Gal",
"Kcnk13",
"Neurod6","Moxd1","Sctr",
"Piezo1","Sst","Adamts9","Kcnn2",
"Calb2"),
IPAN=c("Calcb","Nmu","Adgrg6","Pcdh10",
"Ngfr","Galr1","Il7","Aff2",
"Gpr149","Cdh6","Cdh8",
"Clstn2","Ano2","Ntrk3",
"Cpne4","Vwc2l","Cdh9","Scgn",
"Vcan","Cck","Piezo2","Kcnh7",
"Rerg","Bmpr1b","Skap1","Ntng1",
"Tafa2","Nxph2"),
CONTAM=c("Actl6b","Phox2b","Sox10","Plp1",
"Gfap","Rxrg","Pdgfra","Acta2"))
pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CL_new.s
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s
library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
if(i!=1){
sweep.res.list[[i]] <- sweep.res.list[[i-1]]
}else(
sweep.res.list[[i]] <- sweep.res.list[[i+1]]
)
}
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)
## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number
nExp_poi <- round(0.05*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 6293 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number
nExp_poi <- round(0.1*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 6293 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
levels = c(0,3,4,14,10,8,1,17,28, # EMN
5,2,6,7,13,16,26, # IMN
9,20, 22, # IN
15,11, 12,21, 29,19,32, # IPAN
27,25,18,31,24, # Mix
30, # Glia
23 # SMC
))
# preAnno1
GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(0,4,3,14,10,8,1,17,28)] <- "EMN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(5,2,6,7,13,16,26)] <- "IMN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(9,20, 22)] <- "IN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(15,11, 12,21, 29,32,19)] <- "IPAN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(27,25,30,18,31,24)] <- "MIX"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(30)] <- "Glia"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(23)] <- "SMC"
GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
levels = c("EMN","IMN","IN","IPAN",
"MIX","Glia","SMC"))
# preAnno2
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(0,4,3,14)] <- "EMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(10)] <- "EMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(8,1)] <- "EMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(17,28)] <- "EMN4"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(5,2,6,7)] <- "IMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(13)] <- "IMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(16)] <- "IMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(26)] <- "IMN4"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(9)] <- "IN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(20)] <- "IN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(22)] <- "IN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(15,11)] <- "IPAN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(12,21)] <- "IPAN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(29,32)] <- "IPAN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(19)] <- "IPAN4"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(27,25,30,18,31,24)] <- "MIX"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(30)] <- "Glia"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(23)] <- "SMC"
GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
levels = c(paste0("EMN",1:4),
paste0("IMN",1:4),
paste0("IN",1:3),
paste0("IPAN",1:4),
"MIX","Glia","SMC"))
pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s
pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
#saveRDS(GEX.seur,"GEX0112.seur.pre.rds")
GEX.pure <- subset(GEX.seur, subset = preAnno1 %in% levels(GEX.seur$preAnno1)[1:4] & DoubletFinder0.1 == "Singlet")
GEX.pure
## An object of class Seurat
## 24081 features across 16562 samples within 1 assay
## Active assay: RNA (24081 features, 1084 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
pm.CL_new.s <- DotPlot(subset(GEX.pure,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s
pm.All_new.s <- DotPlot(GEX.pure, features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno2") +
DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno2)[c(3:7,9,10),c(1:15)],
main = "Cell Count",
gaps_row = c(4),
gaps_col = c(4,8,11,15),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno2)[c(3:7,9,10),c(1:15)]),
main = "Cell Ratio",
gaps_row = c(4),
gaps_col = c(4,8,11,15),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
GEX.pure$rep <- paste0("rep",
gsub("CTL|CKO","",as.character(GEX.pure$FB.info)))
stat_preAnno2 <- GEX.pure@meta.data[,c("cnt","rep","preAnno2")]
stat_preAnno2.s <- stat_preAnno2 %>%
group_by(cnt,rep,preAnno2) %>%
summarise(count=n()) %>%
mutate(prop= count/sum(count)) %>%
ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno2 <- stat_preAnno2.s %>%
ggplot(aes(x = preAnno2, y = 100*prop, color=cnt)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
theme_classic(base_size = 15) +
scale_color_manual(values = c("skyblue","pink"), name="") +
labs(title="Cell Composition of SI data, preAnno2", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
geom_point(aes(x=preAnno2, y = 100*prop, color= cnt),
position = position_dodge(0.75),
shape=16,alpha=0.75,size=2.15,
stroke=0.15, show.legend=F)
pcom.preAnno2
glm.nb - anova.LRT
Sort.N <- c("CTL","CKO")
glm.nb_anova.preAnno2 <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno2.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
"_",
stat_preAnno2.s_N$rep),"total"]
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(stat_preAnno2.s_N$preAnno2)){
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno2_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df))
glm.nb_anova.preAnno2_df
## EMN1 EMN2 EMN3 EMN4 IMN1 IMN2 IMN3
## CTLvsCKO 0.3533798 0.2524307 0.9089482 0.5325865 0.279355 0.100509 0.8366743
## IMN4 IN1 IN2 IN3 IPAN1 IPAN2
## CTLvsCKO 0.753595 0.0003353706 0.4826819 0.4567131 0.07933713 0.9827769
## IPAN3 IPAN4 MIC Glia SMC
## CTLvsCKO 0.5146459 0.3950726 NA NA NA